library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(glue)
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.3.1
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.3.1
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.1
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:glue':
## 
##     trim
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.1
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(lemur)
## 
## Attaching package: 'lemur'
## 
## The following object is masked from 'package:dplyr':
## 
##     vars
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars
source("util.R")
fit <- qs::qread("../benchmark/output/cable_spatial_plaque_data/fit_small.qs")
nei <- qs::qread("../benchmark/output/cable_spatial_plaque_data/nei_small.qs")
mouse_labels <- structure(paste0("Mouse ", 1:4), names = paste0("mouse_", 1:4))
spatial_plaq_dens_pl <- as_tibble(colData(fit)) %>%
  ggplot(aes(x = x, y = y)) +
    ggrastr::rasterize(geom_point(aes(color = plaque_density), size = 0.05, stroke = 0), dpi = 600) +
    scale_color_gradient(low = "lightgrey", high = "darkorange", limits = c(0, 1), breaks = c(0, 0.5, 1)) +
    facet_wrap(vars(sample), labeller = as_labeller(mouse_labels)) +
    small_axis(label = "spatial coord.", fontsize = font_size_small) +
    labs(color = "Plaque density", subtitle = "Spatial structure") +
    guides(color = guide_colorbar(barheight = unit(1, "mm"))) +
    theme(legend.position = "top", strip.text = element_text(size = font_size_tiny, margin = margin(b = -1, unit = "mm")),
          plot.subtitle = element_text(size = 7))

spatial_plaq_dens_pl

set.seed(1)
umap <- uwot::umap(t(fit$embedding))
as_tibble(colData(fit)) %>%
  mutate(umap = umap) %>%
  ggplot(aes(x = umap[,1], y = umap[,2])) +
    geom_point(aes(color = sample), size = 0.3, stroke = 0) +
    guides(color = guide_legend(override.aes = list(size = 1)))

as_tibble(colData(fit)) %>%
  mutate(umap = umap) %>%
  ggplot(aes(x = umap[,1], y = umap[,2])) +
    geom_point(aes(color = cell_type_lumped), size = 0.3, stroke = 0) +
    guides(color = guide_legend(override.aes = list(size = 1)))

as_tibble(colData(fit)) %>%
  mutate(umap = umap) %>%
  ggplot(aes(x = umap[,1], y = umap[,2])) +
    geom_point(aes(color = plaque_cluster), size = 0.3, stroke = 0) +
    guides(color = guide_legend(override.aes = list(size = 1)))

umap_plt <- as_tibble(colData(fit)) %>%
  mutate(umap = umap) %>%
  ggplot(aes(x = umap[,2], y = umap[,1])) +
    ggrastr::rasterize(geom_point(aes(color = plaque_density), size = 0.1, stroke = 0, show.legend = FALSE), dpi = 600) +
    scale_color_gradient(low = "lightgrey", high = "darkorange", limits = c(0, 1), breaks = c(0, 0.5, 1))  +
    small_axis(label = "UMAP", fontsize = font_size_small) +
    labs(subtitle = "Latent structure") +
    theme(plot.subtitle = element_text(size = 7))

umap_plt

Make cell type plot

table(fit$colData$cell_type_lumped, fit$colData$cell_type, useNA = "always")
##                  
##                   ASTROCYTE CHOROID_PLEXUS ENDOTHELIAL_STALK ENDOTHELIAL_TIP
##   ASTROCYTE            8659              0                 0               0
##   MICROGLIA               0              0                 0               0
##   NEUROGENESIS            0              0                 0               0
##   NEURON                  0              0                 0               0
##   OLIGODENDROCYTE         0              0                 0               0
##   Other                   0              2               259             790
##   <NA>                    0              0                 0               0
##                  
##                   MACROPHAGE MICROGLIA MURAL NEUROGENESIS NEURON
##   ASTROCYTE                0         0     0            0      0
##   MICROGLIA                0       816     0            0      0
##   NEUROGENESIS             0         0     0          519      0
##   NEURON                   0         0     0            0  23032
##   OLIGODENDROCYTE          0         0     0            0      0
##   Other                   61         0    86            0      0
##   <NA>                     0         0     0            0      0
##                  
##                   OLIGODENDROCYTE POLYDENDROCYTE  <NA>
##   ASTROCYTE                     0              0     0
##   MICROGLIA                     0              0     0
##   NEUROGENESIS                  0              0     0
##   NEURON                        0              0     0
##   OLIGODENDROCYTE            6308              0     0
##   Other                         0            523     0
##   <NA>                          0              0     0
celltype_broad_plt <- as_tibble(colData(fit)) %>%
  mutate(umap = umap) %>%
  ggplot(aes(x = umap[,2], y = umap[,1])) +
    ggrastr::rasterize(geom_point(aes(color = cell_type_lumped), size = 0.1, stroke = 0, show.legend = FALSE), dpi = 600) +
    ggrepel::geom_label_repel(data = . %>% summarize(umap = matrix(colMedians(umap), nrow = 1), .by = cell_type_lumped), 
                              aes(label = cell_type_lumped, color = cell_type_lumped), size = font_size_small / .pt, show.legend = FALSE, max.overlaps = 100) +
    scale_x_continuous(limits = range(umap[,2]) * 1.5) +
    small_axis(label = "UMAP", fontsize = font_size_small) +
    labs(title = "(B) Mouse hypocampus: broad cell type labels") 
cowplot::save_plot("../plots/suppl_alzheimer_cell_types.pdf", celltype_broad_plt, base_height = 8, base_width = 8, units = "cm")
genes_of_interest <- nei$name[1]
mask <- matrix(NA, nrow = length(genes_of_interest), ncol = ncol(fit), 
               dimnames = list(genes_of_interest, colnames(fit)))
mask2 <- matrix(1, nrow = length(genes_of_interest), ncol = ncol(fit), 
               dimnames = list(genes_of_interest, colnames(fit)))
for(id in genes_of_interest){
  mask[id, filter(nei, name == id)$neighborhood[[1]]] <- 1
  mask2[id, filter(nei, name == id)$neighborhood[[1]]] <- NA
}

psce2 <- glmGamPoi::pseudobulk(SingleCellExperiment(list(inside = as.matrix(logcounts(fit)[genes_of_interest,,drop=FALSE] * mask),
                                                        outside = as.matrix(logcounts(fit)[genes_of_interest,,drop=FALSE] * mask2))),
                      group_by = vars(sample, plaque_cluster), n = n(),
                      aggregation_functions = list(.default = \(...) matrixStats::rowMeans2(..., na.rm = TRUE)),
                      col_data = as.data.frame(colData(fit)))
## Aggregating assay 'inside' using 'custom function'.
## Aggregating assay 'outside' using 'custom function'.
comparison_data <- as_tibble(colData(psce2)) %>%
  mutate(expr_inside = as_tibble(t(assay(psce2, "inside"))),
         expr_outside = as_tibble(t(assay(psce2, "outside")))) %>%
  unpack(starts_with("expr"), names_sep = "-") %>%
  pivot_longer(starts_with("expr"), names_sep = "[-_]", names_to = c(".value", "origin", "symbol")) 



expr_comparison_pl <- comparison_data %>%
  mutate(plaque_cluster = ordered(plaque_cluster, levels = levels(fit$colData$plaque_cluster))) %>% 
  ggplot(aes(x = plaque_cluster, y = expr)) +
    geom_point(aes(color = origin), size = 0.3) +
    geom_smooth(aes(color = origin, x = as.integer(plaque_cluster)), span = 1.5, se = FALSE, linewidth = 2) +
    scale_color_manual(values = c("inside" = "black", "outside" = "lightgrey"), labels = c("inside" = "Cells in neighborhood", "outside" = "All other cells")) +
    scale_y_continuous(limits = c(0, 0.3), expand = expansion(add = 0)) +
    guides(x = guide_axis(angle = 45)) +
    theme(axis.title.x = element_blank(), legend.position = "bottom") +
    labs(color = "", y = "Expression",
         subtitle = "\\emph{Jun} expr. vs. plaque density")  +
    theme(plot.subtitle = element_text(size = 7))

expr_comparison_pl
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

is_inside <- tibble(symbol = genes_of_interest, cell_id = list(colnames(fit))) %>%
  left_join(dplyr::select(nei, name, neighborhood), by = c("symbol"= "name")) %>%
  mutate(inside = map2(cell_id, neighborhood, \(ref, nei_cells) ref %in% nei_cells)) %>%
  dplyr::select(-neighborhood) %>%
  unnest(c(inside, cell_id))

de_plot_data <- as_tibble(colData(fit), rownames = "cell_id") %>%
  mutate(umap = umap) %>%
  mutate(de = as_tibble(t(assay(fit[genes_of_interest,], "DE")))) %>%
  unnest(de, names_sep = "-") %>%
  pivot_longer(starts_with("de-"), names_sep = "-", values_to = "de", names_to = c(NA, "symbol")) %>%
  inner_join(is_inside, by = c("symbol", "cell_id")) %>%
  sample_frac(size = 1) 

abs_max <- max(abs(quantile(de_plot_data$de, c(0.95, 0.05))))

jun_de_pl <- de_plot_data %>%
  mutate(inside = ifelse(inside, "in", "out")) %>%
  mutate(inside = factor(inside, levels = c("in", "out")))  %>%
  ggplot(aes(x = umap[,2], y = umap[,1])) +
    ggrastr::rasterise(geom_point(aes(color = de), size = 0.05, stroke = 0), dpi = 600) +
    # scale_colour_gradient2_rev(limits = c(-1, 1) * abs_max, oob = scales::squish, breaks = c(-1, 0, 1) * signif_to_zero(abs_max, 1), mid = "lightgrey") +
    scale_color_de_gradient(abs_max, mid_width = 0.2) +
    facet_grid(vars(inside), labeller = labeller(inside = as_labeller(c("in" = "Cells in Neighb.", "out" = "All other cells"))),
               switch="y") +
    small_axis("UMAP", fontsize = font_size_small) +
    theme(legend.position = "bottom", legend.margin = margin(t = -2, unit = "mm")) +
    guides(color = guide_colorbar(barheight = unit(1, "mm"), barwidth = unit(15, "mm"), title.vjust = 1)) +
    labs(color = "$\\Delta$")

jun_de_pl

rel_plt <- colData(fit) %>%
  as_tibble(rownames = "cell_id") %>%
  left_join(is_inside) %>%
  dplyr::count(plaque_cluster, symbol, inside) %>%
  mutate(frac = n / sum(n), .by = c(symbol, inside)) %>%
  mutate(plaque_cluster = fct_relabel(plaque_cluster, \(x) str_replace(x, ",", ", "))) %>%
  ggplot(aes(x = plaque_cluster, y = frac)) +
    geom_col(aes(fill = inside), position = position_dodge2()) +
    scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    scale_x_discrete(position = "top") +
    scale_fill_manual(values = c("TRUE" = "black", "FALSE" = "lightgrey")) +
    guides(x = guide_axis(angle = 90)) +
    theme(axis.title.x = element_blank(), legend.position = "bottom") +
    labs(color = "", y = "Rel. No. Cells")
## Joining with `by = join_by(cell_id)`
rel_plt

plot_assemble(
  add_text("(D) Alzheimer plaque density", x = 2.7, y = 1, fontsize = font_size, vjust = 1, fontface = "bold"),
  add_plot(cowplot::get_legend(spatial_plaq_dens_pl + theme(legend.text = element_text(size = font_size)) + labs(color = "")), x = 43, y = 1, height = 5),
  add_plot(spatial_plaq_dens_pl + guides(color = "none"), x = 0, y = 5, width = 60, height = 61),
  add_plot(umap_plt, x = 52, y = 5, width = 40, height = 61),
  add_text("(E) Expression of \\emph{Jun} depends on the plaque density",
           x = 95, y = 1, fontsize = font_size, vjust = 1, fontface = "bold"),
  add_text("Pred.~\\emph{Jun} diff.~expr.",
           x = 97.5, y = 6.5, fontsize = 7, vjust = 1, fontface = "plain"),
  add_plot(jun_de_pl, x = 95, y = 4.5, width = 25, height = 61.5),
  add_plot(expr_comparison_pl + guides(color = "none") + theme(axis.text.x = element_blank()), x = 120, y = 5, width = 50, height = 30),
  add_plot(rel_plt + guides(fill = "none"), x = 120, y = 34, width = 50, height = 26),
  add_plot(cowplot::get_legend(expr_comparison_pl), x = 125, y = 60, width = 60, height = 5),
  
  add_graphic("../plots/schematic_elements/alzheimer_circle.pdf", x = -1,  y = -1, units = "mm"),
  
  width = 170, height = 65, units = "mm", show_grid_lines = FALSE,
  latex_support = TRUE, filename = "../plots/continuous_covar_figure_part2.pdf"
)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## gg[gg1]
## 
## gg[gg2]
## 
## gg[gg3]
## 
## gg[gg4]
## 
## gg[gg5]
## 
## gg[gg6]
## 
## gg[gg7]
## 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## gg[gg8]
## 
## gg[gg9]
## 
## gg[gg10]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE